Dengue susceptibility analysis

Author

Leo Bastos

Intro

This analysis aims to show in a naive way how many people are susceptible to dengue and its serotypes in Brazil.

Reading data

Code
pacman::p_load(tidyverse, readxl, geobr)

# Dados do SINAN
denvbr <- read_csv("data/denvbr20152024.csv.gz")

# Populacao CENSO
pop22 <- readxl::read_xlsx(path = "data/POP2022 CD2022_Populacao_Coletada_Imputada_e_Total_Municipio_e_UF.xlsx", 
                           sheet = "Municípios",
                           range = "B3:H5573"
)

rgi <- read_xlsx(path = "data/regioes_geograficas_composicao_por_municipios_2017_20180911.xlsx")

BR0 <- read_municipality()

  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |===                                                                   |   4%
  |                                                                            
  |=====                                                                 |   7%
  |                                                                            
  |========                                                              |  11%
  |                                                                            
  |==========                                                            |  15%
  |                                                                            
  |=============                                                         |  19%
  |                                                                            
  |================                                                      |  22%
  |                                                                            
  |==================                                                    |  26%
  |                                                                            
  |=====================                                                 |  30%
  |                                                                            
  |=======================                                               |  33%
  |                                                                            
  |==========================                                            |  37%
  |                                                                            
  |=============================                                         |  41%
  |                                                                            
  |===============================                                       |  44%
  |                                                                            
  |==================================                                    |  48%
  |                                                                            
  |====================================                                  |  52%
  |                                                                            
  |=======================================                               |  56%
  |                                                                            
  |=========================================                             |  59%
  |                                                                            
  |============================================                          |  63%
  |                                                                            
  |===============================================                       |  67%
  |                                                                            
  |=================================================                     |  70%
  |                                                                            
  |====================================================                  |  74%
  |                                                                            
  |======================================================                |  78%
  |                                                                            
  |=========================================================             |  81%
  |                                                                            
  |============================================================          |  85%
  |                                                                            
  |==============================================================        |  89%
  |                                                                            
  |=================================================================     |  93%
  |                                                                            
  |===================================================================   |  96%
  |                                                                            
  |======================================================================| 100%
Code
BR_states <- read_state()

  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |===                                                                   |   4%
  |                                                                            
  |=====                                                                 |   7%
  |                                                                            
  |========                                                              |  11%
  |                                                                            
  |==========                                                            |  15%
  |                                                                            
  |=============                                                         |  19%
  |                                                                            
  |================                                                      |  22%
  |                                                                            
  |==================                                                    |  26%
  |                                                                            
  |=====================                                                 |  30%
  |                                                                            
  |=======================                                               |  33%
  |                                                                            
  |==========================                                            |  37%
  |                                                                            
  |=============================                                         |  41%
  |                                                                            
  |===============================                                       |  44%
  |                                                                            
  |==================================                                    |  48%
  |                                                                            
  |====================================                                  |  52%
  |                                                                            
  |=======================================                               |  56%
  |                                                                            
  |=========================================                             |  59%
  |                                                                            
  |============================================                          |  63%
  |                                                                            
  |===============================================                       |  67%
  |                                                                            
  |=================================================                     |  70%
  |                                                                            
  |====================================================                  |  74%
  |                                                                            
  |======================================================                |  78%
  |                                                                            
  |=========================================================             |  81%
  |                                                                            
  |============================================================          |  85%
  |                                                                            
  |==============================================================        |  89%
  |                                                                            
  |=================================================================     |  93%
  |                                                                            
  |===================================================================   |  96%
  |                                                                            
  |======================================================================| 100%
Code
data <- pop22 |> transmute(
  UF,
  coduf = `COD. UF`,
  codmun7 = paste0(`COD. UF`,`COD. MUNIC`),
  codmun6 = substr(codmun7,1,6),
  pop = `POP. TOTAL`
)

aux2 <- denvbr |> 
  mutate(SOROTIPO = ifelse(is.na(SOROTIPO),SOROTIPO, paste0("DENV",SOROTIPO))) |> 
  group_by(codmun6 = as.character(ID_MN_RESI)) |> 
  mutate(casos = n()) |> 
  group_by(codmun6, SOROTIPO) |> 
  summarise(
    n = n(),
    casos = casos[1]
  ) |> 
  pivot_wider(values_from = n, names_from = SOROTIPO)

data <- data |> left_join(aux2) 

Distribution of the proportion of cases in the last 10 years divided by the 2022 population according to population size.

Code
density_br <- data |> 
  mutate(
    ratio = casos / pop,
    ratio = replace_na(ratio, 0),
    popsize = case_when(
      pop > 5e5 ~ "Population >500k",
      TRUE ~ "Population <= 500k"
    )
  ) |> 
  ggplot(aes(x = ratio, color = popsize)) + 
  geom_density() +
  labs(x = "Cases/Population", y = "density")+
  scale_x_continuous(n.breaks = 10)+
  theme_bw()
density_br

Serotype

Code
data <- data |> 
  left_join(rgi, by = c("codmun7"="CD_GEOCODI")) |> 
  mutate(
    DENV1 = replace_na(DENV1,0),
    DENV2 = replace_na(DENV2,0),
    DENV3 = replace_na(DENV3,0),
    DENV4 = replace_na(DENV4,0),
    DENVT = DENV1 + DENV2 + DENV3 + DENV4,
    DENVT = ifelse(DENVT==0,1,DENVT)
  ) |> 
  # Calcular proporcoes de sorotipo segundo RGI
  group_by(cod_rgi) |> 
  mutate(
    DENV1p = sum(DENV1) / sum(DENVT), 
    DENV2p = sum(DENV2) / sum(DENVT), 
    DENV3p = sum(DENV3) / sum(DENVT), 
    DENV4p = sum(DENV4) / sum(DENVT) 
  ) |> ungroup() |> 
  # Se a populacao for maior que 100k e ter em 10 anos mais de 100 confirmados
  mutate(
    ratio = casos / pop,
    ratio = replace_na(ratio, 0),
    DENV1p = if_else(pop >= 100000 & DENVT > 100, DENV1/ DENVT, DENV1p), 
    DENV2p = if_else(pop >= 100000 & DENVT > 100, DENV2/ DENVT, DENV2p), 
    DENV3p = if_else(pop >= 100000 & DENVT > 100, DENV3/ DENVT, DENV3p), 
    DENV4p = if_else(pop >= 100000 & DENVT > 100, DENV4/ DENVT, DENV4p)
  ) |>  
  ## Following the Table S5 for the Spearman's correlation for serotype-specific FOI
  ## Antigenic evolution of dengue viruses over 20 years
  ## https://www.science.org/doi/abs/10.1126/science.abk0058
  mutate(
    DENV_1_2 = if_else(pop >= 100000 & DENVT > 100, 
                       0.23*(DENV1/ if_else(DENV2 == 0, 1, DENV2))/DENVT, 
                       0), 
    DENV_1_3 = if_else(pop >= 100000 & DENVT > 100, 
                       0.17*(DENV1/ if_else(DENV3 == 0, 1, DENV3))/DENVT, 
                       0), 
    DENV_1_4 = if_else(pop >= 100000 & DENVT > 100, 
                       0.09*(DENV1/ if_else(DENV4 == 0, 1, DENV4))/DENVT, 
                       0), 
    DENV_1_4 = if_else(pop >= 100000 & DENVT > 100, 
                       0.09*(DENV1/ if_else(DENV4 == 0, 1, DENV4))/DENVT, 
                       0), 
    DENV_2_3 = if_else(pop >= 100000 & DENVT > 100, 
                       0.01*(DENV2/ if_else(DENV3 == 0, 1, DENV3))/DENVT, 
                       0), 
    DENV_2_4 = if_else(pop >= 100000 & DENVT > 100, 
                       -0.19*(DENV2/ if_else(DENV4 == 0, 1, DENV4))/DENVT, 
                       0), 
    DENV_3_4 = if_else(pop >= 100000 & DENVT > 100, 
                       -0.31*(DENV3/ if_else(DENV4 == 0, 1, DENV4))/DENVT, 
                       0), 
  )

BR <- BR0 |> 
  left_join( data |> 
               mutate(code_muni = as.numeric(codmun7))
  )

Maps

Code
denv_br <- BR |> 
  mutate(ratio = replace_na(ratio, 0)) |>
  ggplot(aes(fill = 1 - ratio)) + 
  geom_sf(color = NA) + 
  geom_sf(data = BR_states,
          color = "black",
          fill = "transparent")+
  colorspace::scale_fill_continuous_sequential(name = "Dengue outbreak \n Risk",
                                               # rev = F,
                                               palette = "Purple-Yellow")+
  ggthemes::theme_map()
denv_br

Code
ggsave(filename = "Fig/denv_risk_outbreak.png",
       plot = denv_br,
       width = 16,
       height = 12,
       dpi = 200)

# BR |> 
#   mutate(ratio.cat = cut(ratio, 
#                          breaks = c(0,.2,.5,1.1),
#                          labels = c("<20%", "20-50%", ">50%")
#                          )
#          ) |> 
#   ggplot(aes(fill = ratio.cat)) + 
#   geom_sf(color = NA) + 
# theme_void()

DENV1

Code
BR |> 
  mutate(ratio = replace_na(ratio, 0) * DENV1p ) |> 
  ggplot(aes(fill = ratio)) + 
  geom_sf(color = NA) + 
  colorspace::scale_fill_continuous_sequential(palette = "inferno") +
  theme_void()

DENV2

Code
BR |> 
  mutate(ratio = replace_na(ratio, 0) * DENV2p ) |> 
  ggplot(aes(fill = ratio)) + 
  geom_sf(color = NA) + 
  colorspace::scale_fill_continuous_sequential(palette = "inferno") +
  theme_void()

DENV3

Code
BR |> 
  mutate(ratio = replace_na(ratio, 0) * DENV3p ) |> 
  ggplot(aes(fill = ratio)) + 
  geom_sf(color = NA) + 
  colorspace::scale_fill_continuous_sequential(palette = "inferno") +
  theme_void()

DENV4

Code
BR |> 
  mutate(ratio = replace_na(ratio, 0) * DENV4p ) |> 
  ggplot(aes(fill = ratio)) + 
  geom_sf(color = NA) + 
  colorspace::scale_fill_continuous_sequential(palette = "inferno") +
  theme_void()

Risk maps

Code
# breaks_risk <- seq(0.5, 1, 0.1)
# labels_risk <- c("0-50%", "60%", "70%", "80%", "90%", "100%")

# sf::insf_use_s2(FALSE)
# 
# geom_rgi <- rgi |> 
#   left_join(BR0 |> 
#               mutate(code_muni = as.character(code_muni)), 
#             by = c("CD_GEOCODI" = "code_muni")) |> 
#   group_by(cod_rgi) |> 
#   summarise(geometry = geom |> 
#               sf::st_union() |> 
#               sfheaders::sf_remove_holes()) |> 
#   sf::st_as_sf() |> 
#   left_join(rgi)
# 
# data_rgi <- geom_rgi |> 
#   left_join(data,
#             by = c("CD_GEOCODI" = "codmun7"))

data_br <- BR |> 
  mutate(ratio_denv1 = replace_na(ratio, 0) * DENV1p,
         ratio_denv2 = replace_na(ratio, 0) * DENV2p,
         ratio_denv3 = replace_na(ratio, 0) * DENV3p,
         ratio_denv4 = replace_na(ratio, 0) * DENV4p)

denv1.risk <- data_br |> 
  ggplot(aes(fill = 1 - ratio_denv1)) + 
  geom_sf(color = NA) + 
  geom_sf(data = BR_states,
          color = "black",
          fill = "transparent")+
  colorspace::scale_fill_continuous_sequential(name = "Outbreak \n Risk",
                                               # rev = F,
                                               palette = "Purple-Yellow")+
  ggthemes::theme_map()+
  labs(title = "DENV1")
denv1.risk

Code
denv2.risk <- data_br |> 
  ggplot(aes(fill = 1 - ratio_denv2)) + 
  geom_sf(color = NA) + 
  geom_sf(data = BR_states,
          color = "black",
          fill = "transparent")+
  colorspace::scale_fill_continuous_sequential(name = "Outbreak \n Risk",
                                               # rev = F,
                                               palette = "Purple-Yellow")+
  ggthemes::theme_map()+
  labs(title = "DENV2")
denv2.risk

Code
denv3.risk <- data_br |> 
  ggplot(aes(fill = 1 - ratio_denv3)) + 
  geom_sf(color = NA) + 
  geom_sf(data = BR_states,
          color = "black",
          fill = "transparent")+
  colorspace::scale_fill_continuous_sequential(name = "Outbreak \n Risk",
                                               # rev = F,
                                               palette = "Purple-Yellow")+
  ggthemes::theme_map()+
  labs(title = "DENV3")
denv3.risk

Code
denv4.risk <- data_br |> 
  ggplot(aes(fill = 1 - ratio_denv4)) + 
  geom_sf(color = NA) + 
  geom_sf(data = BR_states,
          color = "black",
          fill = "transparent")+
  colorspace::scale_fill_continuous_sequential(name = "Outbreak \n Risk",
                                               # rev = F,
                                               palette = "Purple-Yellow")+
  ggthemes::theme_map()+
  labs(title = "DENV4")
denv4.risk

Code
library(patchwork)

patchwork_risk <-((denv1.risk | denv2.risk) / (denv3.risk | denv4.risk))
patchwork_risk

Code
ggsave(filename = "Fig/denv_serotype_risk_outbreak.png",
       plot = patchwork_risk,
       width = 16,
       height = 12,
       dpi = 200)

A model to smooth out the cases

In an attempt to have more smooth estimates of cases we employed the following model, \(Y_i\) is the number of cases at the municipality \(i\):

\[ Y_i \sim Poisson(\lambda_i) \\ log(\lambda_i) = \mu + \theta(\lambda_i) + \epsilon_i \\ \]

Where we a modeling the number of cases to have a smoothed version of them, to that end the \(\theta(\lambda_i)\) term is a CAR model with the following structure:

\[ \theta(\lambda_i) = \frac{1}{\sqrt\tau_b}(\sqrt{1-\phi} v(\lambda_i) + \sqrt\phi u(\lambda_i)) \]

where \(\tau_b\) is a precision parameter and \(\phi\) is the mixing parameter. Here, the \(𝑣(\lambda_i)\) are iid with a normal distribution with variance equal to one, and the \(𝑢(\lambda_i)\) parameters are assigned the Besag intrinsic conditional autoregressive (ICAR) model with variance equal to 1.

From that estimates we calculate as before the cases to population ratio and the ratios for each of the serotypes.

Code
sf::sf_use_s2(FALSE)
library(INLA)
compute_list <- control.compute(hyperpar = T, 
                                return.marginals = T, 
                                return.marginals.predictor = T,
                                 dic = T, 
                                cpo = T, 
                                waic = T)

predictor_list <- control.predictor(compute = T)

BR <- BR |> 
  mutate(ID = row_number())

graph_br <- spdep::poly2nb(BR0, queen = T)

nbmat <- spdep::nb2mat(graph_br, style = "W", zero.policy = T)

CAR_model <- inla(formula = as.formula(casos ~ 1+
                                         f(ID,
                                           model = "bym2",
                                           graph = nbmat,
                                           scale.model = TRUE,
                                           constr = TRUE,
                                           hyper = list(phi = list(prior = "pc",
                                                                   param = c(0.5 , 2/3),
                                                                   initial = -3),
                                                        prec = list (prior = "pc.prec",
                                                                     param = c(0.2/0.31, 0.01),
                                                                     initial = 5))
                                         )
),
data = BR,
offset = log(pop),
family = "nbinomial",
verbose = T,
control.compute = compute_list,
control.predictor = predictor_list)

data_br <- BR |> 
  cbind(cases_est = CAR_model$summary.fitted.values |> pull(mean)) |>
  mutate(ratio = replace_na(cases_est/pop, 0),
         ratio_denv1 = replace_na(ratio, 0) * DENV1p,
         ratio_denv2 = replace_na(ratio, 0) * DENV2p,
         ratio_denv3 = replace_na(ratio, 0) * DENV3p,
         ratio_denv4 = replace_na(ratio, 0) * DENV4p)

denv1.risk <- data_br |> 
  ggplot(aes(fill = 1 - ratio_denv1)) + 
  geom_sf(color = NA) + 
  geom_sf(data = BR_states,
          color = "black",
          fill = "transparent")+
  colorspace::scale_fill_continuous_sequential(name = "Outbreak \n Risk",
                                               # rev = F,
                                               palette = "Purple-Yellow")+
  ggthemes::theme_map()+
  labs(title = "DENV1")
denv1.risk

Code
denv2.risk <- data_br |> 
  ggplot(aes(fill = 1 - ratio_denv2)) + 
  geom_sf(color = NA) + 
  geom_sf(data = BR_states,
          color = "black",
          fill = "transparent")+
  colorspace::scale_fill_continuous_sequential(name = "Outbreak \n Risk",
                                               # rev = F,
                                               palette = "Purple-Yellow")+
  ggthemes::theme_map()+
  labs(title = "DENV2")
denv2.risk

Code
denv3.risk <- data_br |> 
  ggplot(aes(fill = 1 - ratio_denv3)) + 
  geom_sf(color = NA) + 
  geom_sf(data = BR_states,
          color = "black",
          fill = "transparent")+
  colorspace::scale_fill_continuous_sequential(name = "Outbreak \n Risk",
                                               # rev = F,
                                               palette = "Purple-Yellow")+
  ggthemes::theme_map()+
  labs(title = "DENV3")
denv3.risk

Code
denv4.risk <- data_br |> 
  ggplot(aes(fill = 1 - ratio_denv4)) + 
  geom_sf(color = NA) + 
  geom_sf(data = BR_states,
          color = "black",
          fill = "transparent")+
  colorspace::scale_fill_continuous_sequential(name = "Outbreak \n Risk",
                                               # rev = F,
                                               palette = "Purple-Yellow")+
  ggthemes::theme_map()+
  labs(title = "DENV4")
denv4.risk